**************************************************************
*HURRICANES AND GAS GOUGING - COMPILE AND UPDATE RETAIL PRICE DATA 
**************************************************************
frame create retail
frame change retail
set more off

*************************
*Distance to nearest competitor
forvalues y = 2004(1)2008 {
use $data_clean\WORKING_MAINDATA.dta, clear
 
keep FuellocationID GasBrand Name Address City MailingZip StoreBrand ///
     CorporateBrand rt_lat rt_lng year
gduplicates drop FuellocationID rt_lat rt_lng Name year, force
gsort FuellocationID -Name
by FuellocationID: carryforward Name, replace
keep if year==`y'
gduplicates drop FuellocationID rt_lat rt_lng, force
gen name=strlower(Name)
drop Name
replace name = subinstr(name, " ", "", .) 
replace name = subinstr(name, "-", "",.) 

local stats shell citgo chevron bp mobil exxon sunoco hess texaco marathon ///
            murphy valero conoco gulf phillips66 eleven kangaroo circlek ///
			racetrac raceway gate tomthumb samsclub petrosouth fina ///
			cumberland pilot liberty bj flashfoods costco clark rallystores ///
			ezmart loves dodgeschicken pure albertsons flyingj coastal ///
			quickfuel travelcentersofamerica breaktime diamondshamrock ///
			freedomoil kroger mapco westar aafes pacpride petrostop stopngo ///
			usagasoline walmart faststop freedomvalu gascity kwikshop wawa ///
			wagapack wacofood upakit townstar tobaccoplus tigertrax thorntons ///
			tcfuels swiftys sunshine sunrisefood suncoastoil speedway shoprite ///
			shopalott savaton sfoodstore ritz reboil rally quickking ///
			prestofoodstore popingo perfectpetroleum twinoilcompany  pantry ///
			ottos nowsave nuggetstore martinfoodmart kwikpantry kwikking ///
			kands jubilee jrfoodstore joyfoodstore johnsonandjohnson ///
			johnscurbmarket jiffy jrfoodmart islandfoodstore inland ///
			ajpetroleum allamericanoil amerika aplusminimarket baron ///
			beelinestore beststop birdies blaylockoil brothersfoodmart ///
			centraloilandsupply centralstation clayoilcorporation cmsi ///
			completestop cornerstore crackerbarrel dailysconveniencestore ///
			deltafoodmart deltaminimart diamondoil dionsquikmart discountzone ///
			dixiemart duhonbrosoil economart elonfoodmart expressfuels ///
			expresslane expressmart exprezit ezserve fasttrack firstcoastenergy ///
			foodfast foodnfun gaskwick giantoil handyfoodstore ///
			handyway kwikstop onestop orionfuels quickstop ///
			qwikking rainbowfoodmart rebofflorida rightwayfoods rocketfuel ///
			royalpetroleum shortstop starenterprise stopnsave superstop ugas  
			
gen store_name=""
foreach lname of local stats {
	gen `lname' = 0
	egen tot=rowtotal(shell-`lname')
	replace `lname'=1 if strpos(name, "`lname'") & tot==0
	replace store_name="`lname'" if strpos(name, "`lname'") & tot==0
	disp "`lname'"
	drop tot  
}
egen tot=rowtotal(shell-ugas)
encode store_name, gen(st_name)
replace st_name=_n+150 if missing(st_name) //Unique store id 
drop shell-ugas GasBrand StoreBrand CorporateBrand name

sort FuellocationID
gen id_dist=_n
scalar num_st=_N

gen km_to_nearestID_df=9999
gen nearestID_df=.
gen nearestID_df_name=.
gen within_1km_df=0
gen within_3km_df=0
gen within_5km_df=0
gen within_10km_df=0

forvalues j=1/`=num_st' {
 	replace rt_lat=round(rt_lat,0.0000001)
	replace rt_lng=round(rt_lng,0.0000001)
	set type double 
	gen lat_end=rt_lat[`j']
		replace lat_end=. in `j'
	gen lon_end=rt_lng[`j']
		replace lon_end=. in `j'
	gen st_name_i=st_name[`j']
	gen FuellocationID_i=FuellocationID[`j']
	geodist lat lon rt_lat rt_lng , gen(dist) 
	replace dist=round(dist,0.0001)
	replace nearestID_df=FuellocationID_i if dist<km_to_nearestID_df & dist!=. & st_name!=st_name_i	
	replace nearestID_df_name=st_name_i if dist<km_to_nearestID_df & dist!=. & st_name!=st_name_i		
	replace km_to_nearestID_df=dist if dist<km_to_nearestID_df & dist!=. & st_name!=st_name_i

	replace within_1km_df=within_1km_df+1 if dist<=1 & st_name!=st_name_i
	replace within_3km_df=within_3km_df+1 if dist<=1 & st_name!=st_name_i
	replace within_5km_df=within_5km_df+1 if dist<=1 & st_name!=st_name_i
	replace within_10km_df=within_10km_df+1 if dist<=1 & st_name!=st_name_i

	drop lat_end lon_end st_name_i FuellocationID_i dist
}
*
keep FuellocationID km_to_nearestID_df nearestID_df nearestID_df_name ///
     within_1km_df within_3km_df within_5km_df within_10km_df st_name
gen year=`y'
save $data_offline\dist_`y', replace
}

use $data_offline\dist_2004, replace
forvalues y = 2005(1)2008 {
	append using $data_offline\dist_`y'
}
sort FuellocationID year
save $data_offline\dist_main, replace


*************************
*Distance to nearest wholesale racks
use $data_clean\WORKING_MAINDATA.dta, clear
 
keep FuellocationID rt_lat rt_lng 
gduplicates drop FuellocationID rt_lat rt_lng, force
sort FuellocationID
geonear  FuellocationID rt_lat rt_lng using $data_clean\wholesale_loc_20191023, ///
	neighbors(rack_id rack_latitude rack_longitude) near(3)
keep FuellocationID nid1-km_to_nid3
rename nid1 nearRack1
rename km_to_nid1 km_to_nearRack1
rename nid2 nearRack2
rename km_to_nid2 km_to_nearRack2
rename nid3 nearRack3
rename km_to_nid3 km_to_nearRack3
save $data_offline\dist_rack, replace
	

***********************************************
*Constructing full wholesale-retail dataset
use $data_clean\WORKING_MAINDATA.dta, clear
drop GasBrand-within_10km StoreBrand CorporateBrand 
drop nearRack-gas_whole3_pre 
compress 
xtset FuellocationID date
tsfill, full
keep if inrange(month,6,10) 

*Carryforward variables characteristics
gsort FuellocationID -Name
by FuellocationID: carryforward Name, replace
by FuellocationID: carryforward Address, replace
by FuellocationID: carryforward City, replace
by FuellocationID: carryforward MailingZip, replace
gen name=strlower(Name)
drop Name
replace name = subinstr(name, " ", "", .) 
replace name = subinstr(name, "-", "",.) 

merge m:1 FuellocationID year using $data_offline\dist_main
drop _merge


*************************
*Correcting brand and retailer indicators
gen br=""
gen ret=""
local b shell citgo chevron bp mobil exxon sunoco hess texaco marathon ///
            murphy valero conoco gulf 
foreach lname of local b {
	disp "`lname'"
	gen `lname' = 0
	egen tot=rowtotal(shell-`lname')
	replace `lname'=1 if strpos(name, "`lname'") & tot==0
	replace br="`lname'" if strpos(name, "`lname'") & tot==0
	drop tot  
}
local r eleven circlek caseys pantry1 pantry2 pantry6 kangarooexpress ///
		speedway kroger pilot quicktrip wawa cumberland holiday mapco ///
		racetrac stewart loves pioneer ezmart timewise sunshine hasty ///
		convenientfoodmart lehigh flyingj gate thorntons parker gogas ///
		flashfoods raceway aafes giant gomart    			
foreach lname of local r {
	gen `lname' = 0
	egen tot=rowtotal(shell-`lname')
	replace `lname'=1 if strpos(name, "`lname'") & tot==0
	replace ret="`lname'" if strpos(name, "`lname'") & tot==0
	drop tot  
}
gen brand_maj=0
	replace brand_maj=1 if !missing(br)
replace br="unbranded" if missing(br)
encode br, gen(branded)			
drop br
gen ret_maj=0
	replace ret_maj=1 if !missing(ret)
replace ret="nonmajor" if missing(ret)
encode ret, gen(retailer)
drop ret
drop shell-gomart


*************************
*Merging in distance to nearest competitor
merge m:1 FuellocationID year using $data_offline\dist_main 
drop _merge


*************************
*Merging wholesale price data
merge m:1 FuellocationID using $data_offline\dist_rack
drop _merge
decode branded, gen(brand_str)
save $data_offline\WORKING_MAINDATA_gabe, replace

*Wholesale prices rack 1
use $data_clean\wholesale_clean_20191023, clear
rename city rack_city
rename state rack_state
rename rack_id nearRack1
merge 1:m nearRack1 date using $data_offline\WORKING_MAINDATA_gabe
drop if _merge==1
drop _merge
gen whole_rack1=wholesale 
	replace whole_rack1 = whole_shell if !missing(whole_shell) & brand_str=="shell"
	replace whole_rack1 = whole_citgo if !missing(whole_citgo) & brand_str=="citgo"
	replace whole_rack1 = whole_bp if !missing(whole_bp) & brand_str=="bp"
	replace whole_rack1 = whole_sunoco if !missing(whole_sunoco) & brand_str=="sunoco"
	replace whole_rack1 = whole_texaco if !missing(whole_texaco) & brand_str=="texaco"
	replace whole_rack1 = whole_murphy if !missing(whole_murphy) & brand_str=="murphy"
	replace whole_rack1 = whole_conoco if !missing(whole_conoco) & brand_str=="conoco"
	replace whole_rack1 = whole_valero if !missing(whole_valero) & brand_str=="valero"
	replace whole_rack1 = whole_chevron if !missing(whole_chevron) & brand_str=="chevron"
	replace whole_rack1 = whole_marathon if !missing(whole_marathon) & brand_str=="marathon"
	replace whole_rack1 = whole_hess if !missing(whole_hess) & brand_str=="hess"
gen nearRack1_city=rack_city
gen nearRack1_st=rack_state
drop rack_city-rack_longitude wholesale-whole_hess
save $data_offline\WORKING_MAINDATA_gabe, replace

*Wholesale prices rack 2
use $data_clean\wholesale_clean_20191023, clear
rename city rack_city
rename state rack_state
rename rack_id nearRack2
merge 1:m nearRack2 date using $data_offline\WORKING_MAINDATA_gabe
drop if _merge==1
drop _merge
gen whole_rack2=wholesale 
	replace whole_rack2 = whole_shell if !missing(whole_shell) & brand_str=="shell"
	replace whole_rack2 = whole_citgo if !missing(whole_citgo) & brand_str=="citgo"
	replace whole_rack2 = whole_bp if !missing(whole_bp) & brand_str=="bp"
	replace whole_rack2 = whole_sunoco if !missing(whole_sunoco) & brand_str=="sunoco"
	replace whole_rack2 = whole_texaco if !missing(whole_texaco) & brand_str=="texaco"
	replace whole_rack2 = whole_murphy if !missing(whole_murphy) & brand_str=="murphy"
	replace whole_rack2 = whole_conoco if !missing(whole_conoco) & brand_str=="conoco"
	replace whole_rack2 = whole_valero if !missing(whole_valero) & brand_str=="valero"
	replace whole_rack2 = whole_chevron if !missing(whole_chevron) & brand_str=="chevron"
	replace whole_rack2 = whole_marathon if !missing(whole_marathon) & brand_str=="marathon"
	replace whole_rack2 = whole_hess if !missing(whole_hess) & brand_str=="hess"
gen nearRack2_city=rack_city
gen nearRack2_st=rack_state
drop rack_city-rack_longitude wholesale-whole_hess  
save $data_offline\WORKING_MAINDATA_gabe, replace

*Wholesale prices rack 3
use $data_clean\wholesale_clean_20191023, clear
rename city rack_city
rename state rack_state
rename rack_id nearRack3
merge 1:m nearRack3 date using $data_offline\WORKING_MAINDATA_gabe
drop if _merge==1
drop _merge
gen whole_rack3=wholesale 
	replace whole_rack3 = whole_shell if !missing(whole_shell) & brand_str=="shell"
	replace whole_rack3 = whole_citgo if !missing(whole_citgo) & brand_str=="citgo"
	replace whole_rack3 = whole_bp if !missing(whole_bp) & brand_str=="bp"
	replace whole_rack3 = whole_sunoco if !missing(whole_sunoco) & brand_str=="sunoco"
	replace whole_rack3 = whole_texaco if !missing(whole_texaco) & brand_str=="texaco"
	replace whole_rack3 = whole_murphy if !missing(whole_murphy) & brand_str=="murphy"
	replace whole_rack3 = whole_conoco if !missing(whole_conoco) & brand_str=="conoco"
	replace whole_rack3 = whole_valero if !missing(whole_valero) & brand_str=="valero"
	replace whole_rack3 = whole_chevron if !missing(whole_chevron) & brand_str=="chevron"
	replace whole_rack3 = whole_marathon if !missing(whole_marathon) & brand_str=="marathon"
	replace whole_rack3 = whole_hess if !missing(whole_hess) & brand_str=="hess"
gen nearRack3_city=rack_city
gen nearRack3_st=rack_state
drop rack_city-rack_longitude wholesale-whole_hess  

label var name "Station Name"
label var st_name "Station Name"
label var km_to_nearestID_df "Dist to Nearest Competitor (km)"
label var nearestID_df "Nearest Competitor (station id)"
label var nearestID_df_name "Nearest Competitor (encoded name)"
label var within_1km_df "Number of Competitors w/in 1 km"
label var within_3km_df "Number of Competitors w/in 3 km"
label var within_5km_df "Number of Competitors w/in 5 km"
label var within_10km_df "Number of Competitors w/in 10 km"
label var brand_maj "Branded Major Indicator"
label var branded "Branded Major (Firm Encoded)"
label var brand_str "Branded Major (Firm Decoded)"
label var ret_maj "Major Retailer Indicator"
label var retailer "Major Retailer (Retailer Encoded)"
label var km_to_nearRack1 "Dist to Closest Rack"
label var km_to_nearRack2 "Dist 2nd Closest Rack"
label var km_to_nearRack3 "Dist 3rd Closest Rack"
label var whole_rack1 "Wholesale Price (nearest rack)"
label var nearRack1_city "Nearest Rack City"
label var nearRack1_st "Nearest Rack State"
label var whole_rack2 "Wholesale Price (2nd nearest rack)"
label var nearRack2_city "2nd Nearest Rack City"
label var nearRack2_st "2nd Nearest Rack State"
label var whole_rack3 "Wholesale Price (3rd nearest rack)"
label var nearRack3_city "3rd Nearest Rack City"
label var nearRack3_st "3rd Nearest Rack State"
                              
order FuellocationID date rt_lat rt_lng name retail_unleaded retail_midgrade ///
	retail_premium whole_rack1 whole_rack2 whole_rack3 brand_maj branded ///
	ret_maj km_to_nearestID_df within_1km_df within_3km_df within_5km_df ///
	within_10km_df

save $data_clean\WORKING_MAINDATA, replace